<?php
/**
 * Provides string functions for UTF-8 strings
 * 
 * This class is implemented to provide a UTF-8 version of almost every built-in
 * PHP string function. For more information about UTF-8, please visit
 * http://flourishlib.com/docs/UTF-8.
 * 
 * @copyright  Copyright (c) 2008-2011 Will Bond
 * @author     Will Bond [wb] <will@flourishlib.com>
 * @license    http://flourishlib.com/license
 * 
 * @package    Flourish
 * @link       http://flourishlib.com/fUTF8
 * 
 * @version    1.0.0b15
 * @changes    1.0.0b15  Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
 * @changes    1.0.0b14  Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
 * @changes    1.0.0b13  Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
 * @changes    1.0.0b12  Fixed a variable name typo in ::sub() [wb, 2011-05-09]
 * @changes    1.0.0b11  Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
 * @changes    1.0.0b10  Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
 * @changes    1.0.0b9   Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
 * @changes    1.0.0b8   Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
 * @changes    1.0.0b7   Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
 * @changes    1.0.0b6   Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
 * @changes    1.0.0b5   Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
 * @changes    1.0.0b4   Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
 * @changes    1.0.0b3   Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
 * @changes    1.0.0b2   Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
 * @changes    1.0.0b    The initial implementation [wb, 2008-06-01]
 */
class fUTF8
{
	// The following constants allow for nice looking callbacks to static methods
	const ascii    = 'fUTF8::ascii';
	const chr      = 'fUTF8::chr';
	const clean    = 'fUTF8::clean';
	const cmp      = 'fUTF8::cmp';
	const explode  = 'fUTF8::explode';
	const icmp     = 'fUTF8::icmp';
	const inatcmp  = 'fUTF8::inatcmp';
	const ipos     = 'fUTF8::ipos';
	const ireplace = 'fUTF8::ireplace';
	const irpos    = 'fUTF8::irpos';
	const istr     = 'fUTF8::istr';
	const len      = 'fUTF8::len';
	const lower    = 'fUTF8::lower';
	const ltrim    = 'fUTF8::ltrim';
	const natcmp   = 'fUTF8::natcmp';
	const ord      = 'fUTF8::ord';
	const pad      = 'fUTF8::pad';
	const pos      = 'fUTF8::pos';
	const replace  = 'fUTF8::replace';
	const reset    = 'fUTF8::reset';
	const rev      = 'fUTF8::rev';
	const rpos     = 'fUTF8::rpos';
	const rtrim    = 'fUTF8::rtrim';
	const str      = 'fUTF8::str';
	const sub      = 'fUTF8::sub';
	const trim     = 'fUTF8::trim';
	const ucfirst  = 'fUTF8::ucfirst';
	const ucwords  = 'fUTF8::ucwords';
	const upper    = 'fUTF8::upper';
	const wordwrap = 'fUTF8::wordwrap';
	
	
	/**
	 * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
	 * 
	 * If //IGNORE support is not provided strings with invalid characters will be truncated
	 * 
	 * @var boolean
	 */
	static private $can_ignore_invalid = NULL;
	
	/**
	 * All lowercase UTF-8 characters mapped to uppercase characters
	 * 
	 * @var array
	 */
	static private $lower_to_upper = array(
		'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
		'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
		'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
		's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
		'y' => 'Y', 'z' => 'Z', 'à' => 'À', 'á' => 'Á', 'â' => 'Â', 'ã' => 'Ã',
		'ä' => 'Ä', 'å' => 'Å', 'æ' => 'Æ', 'ç' => 'Ç', 'è' => 'È', 'é' => 'É',
		'ê' => 'Ê', 'ë' => 'Ë', 'ì' => 'Ì', 'í' => 'Í', 'î' => 'Î', 'ï' => 'Ï',
		'ð' => 'Ð', 'ñ' => 'Ñ', 'ò' => 'Ò', 'ó' => 'Ó', 'ô' => 'Ô', 'õ' => 'Õ',
		'ö' => 'Ö', 'ø' => 'Ø', 'ù' => 'Ù', 'ú' => 'Ú', 'û' => 'Û', 'ü' => 'Ü',
		'ý' => 'Ý', 'þ' => 'Þ', 'ā' => 'Ā', 'ă' => 'Ă', 'ą' => 'Ą', 'ć' => 'Ć',
		'ĉ' => 'Ĉ', 'ċ' => 'Ċ', 'č' => 'Č', 'ď' => 'Ď', 'đ' => 'Đ', 'ē' => 'Ē',
		'ĕ' => 'Ĕ', 'ė' => 'Ė', 'ę' => 'Ę', 'ě' => 'Ě', 'ĝ' => 'Ĝ', 'ğ' => 'Ğ',
		'ġ' => 'Ġ', 'ģ' => 'Ģ', 'ĥ' => 'Ĥ', 'ħ' => 'Ħ', 'ĩ' => 'Ĩ', 'ī' => 'Ī',
		'ĭ' => 'Ĭ', 'į' => 'Į', 'ĳ' => 'Ĳ', 'ĵ' => 'Ĵ', 'ķ' => 'Ķ', 'ĺ' => 'Ĺ',
		'ļ' => 'Ļ', 'ľ' => 'Ľ', 'ŀ' => 'Ŀ', 'ł' => 'Ł', 'ń' => 'Ń', 'ņ' => 'Ņ',
		'ň' => 'Ň', 'ŋ' => 'Ŋ', 'ō' => 'Ō', 'ŏ' => 'Ŏ', 'ő' => 'Ő', 'œ' => 'Œ',
		'ŕ' => 'Ŕ', 'ŗ' => 'Ŗ', 'ř' => 'Ř', 'ś' => 'Ś', 'ŝ' => 'Ŝ', 'ş' => 'Ş',
		'š' => 'Š', 'ţ' => 'Ţ', 'ť' => 'Ť', 'ŧ' => 'Ŧ', 'ũ' => 'Ũ', 'ū' => 'Ū',
		'ŭ' => 'Ŭ', 'ů' => 'Ů', 'ű' => 'Ű', 'ų' => 'Ų', 'ŵ' => 'Ŵ', 'ŷ' => 'Ŷ',
		'ÿ' => 'Ÿ', 'ź' => 'Ź', 'ż' => 'Ż', 'ž' => 'Ž', 'ɓ' => 'Ɓ', 'ƃ' => 'Ƃ',
		'ƅ' => 'Ƅ', 'ɔ' => 'Ɔ', 'ƈ' => 'Ƈ', 'ɗ' => 'Ɗ', 'ƌ' => 'Ƌ', 'ɘ' => 'Ǝ',
		'ə' => 'Ə', 'ɛ' => 'Ɛ', 'ƒ' => 'Ƒ', 'ɠ' => 'Ɠ', 'ɣ' => 'Ɣ', 'ɩ' => 'Ɩ',
		'ɨ' => 'Ɨ', 'ƙ' => 'Ƙ', 'ɯ' => 'Ɯ', 'ɲ' => 'Ɲ', 'ɵ' => 'Ɵ', 'ơ' => 'Ơ',
		'ƣ' => 'Ƣ', 'ƥ' => 'Ƥ', 'ƨ' => 'Ƨ', 'ʃ' => 'Ʃ', 'ƭ' => 'Ƭ', 'ʈ' => 'Ʈ',
		'ư' => 'Ư', 'ʊ' => 'Ʊ', 'ʋ' => 'Ʋ', 'ƴ' => 'Ƴ', 'ƶ' => 'Ƶ', 'ʒ' => 'Ʒ',
		'ƹ' => 'Ƹ', 'ƽ' => 'Ƽ', 'ǆ' => 'Ǆ', 'ǆ' => 'ǅ', 'ǉ' => 'Ǉ', 'ǉ' => 'ǈ',
		'ǌ' => 'Ǌ', 'ǌ' => 'ǋ', 'ǎ' => 'Ǎ', 'ǐ' => 'Ǐ', 'ǒ' => 'Ǒ', 'ǔ' => 'Ǔ',
		'ǖ' => 'Ǖ', 'ǘ' => 'Ǘ', 'ǚ' => 'Ǚ', 'ǜ' => 'Ǜ', 'ǟ' => 'Ǟ', 'ǡ' => 'Ǡ',
		'ǣ' => 'Ǣ', 'ǥ' => 'Ǥ', 'ǧ' => 'Ǧ', 'ǩ' => 'Ǩ', 'ǫ' => 'Ǫ', 'ǭ' => 'Ǭ',
		'ǯ' => 'Ǯ', 'ǳ' => 'Ǳ', 'ǵ' => 'Ǵ', 'ǻ' => 'Ǻ', 'ǽ' => 'Ǽ', 'ǿ' => 'Ǿ',
		'ȁ' => 'Ȁ', 'ȃ' => 'Ȃ', 'ȅ' => 'Ȅ', 'ȇ' => 'Ȇ', 'ȉ' => 'Ȉ', 'ȋ' => 'Ȋ',
		'ȍ' => 'Ȍ', 'ȏ' => 'Ȏ', 'ȑ' => 'Ȑ', 'ȓ' => 'Ȓ', 'ȕ' => 'Ȕ', 'ȗ' => 'Ȗ',
		'ά' => 'Ά', 'έ' => 'Έ', 'ή' => 'Ή', 'ί' => 'Ί', 'ό' => 'Ό', 'ύ' => 'Ύ',
		'ώ' => 'Ώ', 'α' => 'Α', 'β' => 'Β', 'γ' => 'Γ', 'δ' => 'Δ', 'ε' => 'Ε',
		'ζ' => 'Ζ', 'η' => 'Η', 'θ' => 'Θ', 'ι' => 'Ι', 'κ' => 'Κ', 'λ' => 'Λ',
		'μ' => 'Μ', 'ν' => 'Ν', 'ξ' => 'Ξ', 'ο' => 'Ο', 'π' => 'Π', 'ρ' => 'Ρ',
		'σ' => 'Σ', 'τ' => 'Τ', 'υ' => 'Υ', 'φ' => 'Φ', 'χ' => 'Χ', 'ψ' => 'Ψ',
		'ω' => 'Ω', 'ϊ' => 'Ϊ', 'ϋ' => 'Ϋ', 'ϣ' => 'Ϣ', 'ϥ' => 'Ϥ', 'ϧ' => 'Ϧ',
		'ϩ' => 'Ϩ', 'ϫ' => 'Ϫ', 'ϭ' => 'Ϭ', 'ϯ' => 'Ϯ', 'ё' => 'Ё', 'ђ' => 'Ђ',
		'ѓ' => 'Ѓ', 'є' => 'Є', 'ѕ' => 'Ѕ', 'і' => 'І', 'ї' => 'Ї', 'ј' => 'Ј',
		'љ' => 'Љ', 'њ' => 'Њ', 'ћ' => 'Ћ', 'ќ' => 'Ќ', 'ў' => 'Ў', 'џ' => 'Џ',
		'а' => 'А', 'б' => 'Б', 'в' => 'В', 'г' => 'Г', 'д' => 'Д', 'е' => 'Е',
		'ж' => 'Ж', 'з' => 'З', 'и' => 'И', 'й' => 'Й', 'к' => 'К', 'л' => 'Л',
		'м' => 'М', 'н' => 'Н', 'о' => 'О', 'п' => 'П', 'р' => 'Р', 'с' => 'С',
		'т' => 'Т', 'у' => 'У', 'ф' => 'Ф', 'х' => 'Х', 'ц' => 'Ц', 'ч' => 'Ч',
		'ш' => 'Ш', 'щ' => 'Щ', 'ъ' => 'Ъ', 'ы' => 'Ы', 'ь' => 'Ь', 'э' => 'Э',
		'ю' => 'Ю', 'я' => 'Я', 'ѡ' => 'Ѡ', 'ѣ' => 'Ѣ', 'ѥ' => 'Ѥ', 'ѧ' => 'Ѧ',
		'ѩ' => 'Ѩ', 'ѫ' => 'Ѫ', 'ѭ' => 'Ѭ', 'ѯ' => 'Ѯ', 'ѱ' => 'Ѱ', 'ѳ' => 'Ѳ',
		'ѵ' => 'Ѵ', 'ѷ' => 'Ѷ', 'ѹ' => 'Ѹ', 'ѻ' => 'Ѻ', 'ѽ' => 'Ѽ', 'ѿ' => 'Ѿ',
		'ҁ' => 'Ҁ', 'ґ' => 'Ґ', 'ғ' => 'Ғ', 'ҕ' => 'Ҕ', 'җ' => 'Җ', 'ҙ' => 'Ҙ',
		'қ' => 'Қ', 'ҝ' => 'Ҝ', 'ҟ' => 'Ҟ', 'ҡ' => 'Ҡ', 'ң' => 'Ң', 'ҥ' => 'Ҥ',
		'ҧ' => 'Ҧ', 'ҩ' => 'Ҩ', 'ҫ' => 'Ҫ', 'ҭ' => 'Ҭ', 'ү' => 'Ү', 'ұ' => 'Ұ',
		'ҳ' => 'Ҳ', 'ҵ' => 'Ҵ', 'ҷ' => 'Ҷ', 'ҹ' => 'Ҹ', 'һ' => 'Һ', 'ҽ' => 'Ҽ',
		'ҿ' => 'Ҿ', 'ӂ' => 'Ӂ', 'ӄ' => 'Ӄ', 'ӈ' => 'Ӈ', 'ӌ' => 'Ӌ', 'ӑ' => 'Ӑ',
		'ӓ' => 'Ӓ', 'ӕ' => 'Ӕ', 'ӗ' => 'Ӗ', 'ә' => 'Ә', 'ӛ' => 'Ӛ', 'ӝ' => 'Ӝ',
		'ӟ' => 'Ӟ', 'ӡ' => 'Ӡ', 'ӣ' => 'Ӣ', 'ӥ' => 'Ӥ', 'ӧ' => 'Ӧ', 'ө' => 'Ө',
		'ӫ' => 'Ӫ', 'ӯ' => 'Ӯ', 'ӱ' => 'Ӱ', 'ӳ' => 'Ӳ', 'ӵ' => 'Ӵ', 'ӹ' => 'Ӹ',
		'ա' => 'Ա', 'բ' => 'Բ', 'գ' => 'Գ', 'դ' => 'Դ', 'ե' => 'Ե', 'զ' => 'Զ',
		'է' => 'Է', 'ը' => 'Ը', 'թ' => 'Թ', 'ժ' => 'Ժ', 'ի' => 'Ի', 'լ' => 'Լ',
		'խ' => 'Խ', 'ծ' => 'Ծ', 'կ' => 'Կ', 'հ' => 'Հ', 'ձ' => 'Ձ', 'ղ' => 'Ղ',
		'ճ' => 'Ճ', 'մ' => 'Մ', 'յ' => 'Յ', 'ն' => 'Ն', 'շ' => 'Շ', 'ո' => 'Ո',
		'չ' => 'Չ', 'պ' => 'Պ', 'ջ' => 'Ջ', 'ռ' => 'Ռ', 'ս' => 'Ս', 'վ' => 'Վ',
		'տ' => 'Տ', 'ր' => 'Ր', 'ց' => 'Ց', 'ւ' => 'Ւ', 'փ' => 'Փ', 'ք' => 'Ք',
		'օ' => 'Օ', 'ֆ' => 'Ֆ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
		'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
		'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
		'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
		'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
		'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
		'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ḁ' => 'Ḁ', 'ḃ' => 'Ḃ',
		'ḅ' => 'Ḅ', 'ḇ' => 'Ḇ', 'ḉ' => 'Ḉ', 'ḋ' => 'Ḋ', 'ḍ' => 'Ḍ', 'ḏ' => 'Ḏ',
		'ḑ' => 'Ḑ', 'ḓ' => 'Ḓ', 'ḕ' => 'Ḕ', 'ḗ' => 'Ḗ', 'ḙ' => 'Ḙ', 'ḛ' => 'Ḛ',
		'ḝ' => 'Ḝ', 'ḟ' => 'Ḟ', 'ḡ' => 'Ḡ', 'ḣ' => 'Ḣ', 'ḥ' => 'Ḥ', 'ḧ' => 'Ḧ',
		'ḩ' => 'Ḩ', 'ḫ' => 'Ḫ', 'ḭ' => 'Ḭ', 'ḯ' => 'Ḯ', 'ḱ' => 'Ḱ', 'ḳ' => 'Ḳ',
		'ḵ' => 'Ḵ', 'ḷ' => 'Ḷ', 'ḹ' => 'Ḹ', 'ḻ' => 'Ḻ', 'ḽ' => 'Ḽ', 'ḿ' => 'Ḿ',
		'ṁ' => 'Ṁ', 'ṃ' => 'Ṃ', 'ṅ' => 'Ṅ', 'ṇ' => 'Ṇ', 'ṉ' => 'Ṉ', 'ṋ' => 'Ṋ',
		'ṍ' => 'Ṍ', 'ṏ' => 'Ṏ', 'ṑ' => 'Ṑ', 'ṓ' => 'Ṓ', 'ṕ' => 'Ṕ', 'ṗ' => 'Ṗ',
		'ṙ' => 'Ṙ', 'ṛ' => 'Ṛ', 'ṝ' => 'Ṝ', 'ṟ' => 'Ṟ', 'ṡ' => 'Ṡ', 'ṣ' => 'Ṣ',
		'ṥ' => 'Ṥ', 'ṧ' => 'Ṧ', 'ṩ' => 'Ṩ', 'ṫ' => 'Ṫ', 'ṭ' => 'Ṭ', 'ṯ' => 'Ṯ',
		'ṱ' => 'Ṱ', 'ṳ' => 'Ṳ', 'ṵ' => 'Ṵ', 'ṷ' => 'Ṷ', 'ṹ' => 'Ṹ', 'ṻ' => 'Ṻ',
		'ṽ' => 'Ṽ', 'ṿ' => 'Ṿ', 'ẁ' => 'Ẁ', 'ẃ' => 'Ẃ', 'ẅ' => 'Ẅ', 'ẇ' => 'Ẇ',
		'ẉ' => 'Ẉ', 'ẋ' => 'Ẋ', 'ẍ' => 'Ẍ', 'ẏ' => 'Ẏ', 'ẑ' => 'Ẑ', 'ẓ' => 'Ẓ',
		'ẕ' => 'Ẕ', 'ạ' => 'Ạ', 'ả' => 'Ả', 'ấ' => 'Ấ', 'ầ' => 'Ầ', 'ẩ' => 'Ẩ',
		'ẫ' => 'Ẫ', 'ậ' => 'Ậ', 'ắ' => 'Ắ', 'ằ' => 'Ằ', 'ẳ' => 'Ẳ', 'ẵ' => 'Ẵ',
		'ặ' => 'Ặ', 'ẹ' => 'Ẹ', 'ẻ' => 'Ẻ', 'ẽ' => 'Ẽ', 'ế' => 'Ế', 'ề' => 'Ề',
		'ể' => 'Ể', 'ễ' => 'Ễ', 'ệ' => 'Ệ', 'ỉ' => 'Ỉ', 'ị' => 'Ị', 'ọ' => 'Ọ',
		'ỏ' => 'Ỏ', 'ố' => 'Ố', 'ồ' => 'Ồ', 'ổ' => 'Ổ', 'ỗ' => 'Ỗ', 'ộ' => 'Ộ',
		'ớ' => 'Ớ', 'ờ' => 'Ờ', 'ở' => 'Ở', 'ỡ' => 'Ỡ', 'ợ' => 'Ợ', 'ụ' => 'Ụ',
		'ủ' => 'Ủ', 'ứ' => 'Ứ', 'ừ' => 'Ừ', 'ử' => 'Ử', 'ữ' => 'Ữ', 'ự' => 'Ự',
		'ỳ' => 'Ỳ', 'ỵ' => 'Ỵ', 'ỷ' => 'Ỷ', 'ỹ' => 'Ỹ', 'ἀ' => 'Ἀ', 'ἁ' => 'Ἁ',
		'ἂ' => 'Ἂ', 'ἃ' => 'Ἃ', 'ἄ' => 'Ἄ', 'ἅ' => 'Ἅ', 'ἆ' => 'Ἆ', 'ἇ' => 'Ἇ',
		'ἐ' => 'Ἐ', 'ἑ' => 'Ἑ', 'ἒ' => 'Ἒ', 'ἓ' => 'Ἓ', 'ἔ' => 'Ἔ', 'ἕ' => 'Ἕ',
		'ἠ' => 'Ἠ', 'ἡ' => 'Ἡ', 'ἢ' => 'Ἢ', 'ἣ' => 'Ἣ', 'ἤ' => 'Ἤ', 'ἥ' => 'Ἥ',
		'ἦ' => 'Ἦ', 'ἧ' => 'Ἧ', 'ἰ' => 'Ἰ', 'ἱ' => 'Ἱ', 'ἲ' => 'Ἲ', 'ἳ' => 'Ἳ',
		'ἴ' => 'Ἴ', 'ἵ' => 'Ἵ', 'ἶ' => 'Ἶ', 'ἷ' => 'Ἷ', 'ὀ' => 'Ὀ', 'ὁ' => 'Ὁ',
		'ὂ' => 'Ὂ', 'ὃ' => 'Ὃ', 'ὄ' => 'Ὄ', 'ὅ' => 'Ὅ', 'ὑ' => 'Ὑ', 'ὓ' => 'Ὓ',
		'ὕ' => 'Ὕ', 'ὗ' => 'Ὗ', 'ὠ' => 'Ὠ', 'ὡ' => 'Ὡ', 'ὢ' => 'Ὢ', 'ὣ' => 'Ὣ',
		'ὤ' => 'Ὤ', 'ὥ' => 'Ὥ', 'ὦ' => 'Ὦ', 'ὧ' => 'Ὧ', 'ᾀ' => 'ᾈ', 'ᾁ' => 'ᾉ',
		'ᾂ' => 'ᾊ', 'ᾃ' => 'ᾋ', 'ᾄ' => 'ᾌ', 'ᾅ' => 'ᾍ', 'ᾆ' => 'ᾎ', 'ᾇ' => 'ᾏ',
		'ᾐ' => 'ᾘ', 'ᾑ' => 'ᾙ', 'ᾒ' => 'ᾚ', 'ᾓ' => 'ᾛ', 'ᾔ' => 'ᾜ', 'ᾕ' => 'ᾝ',
		'ᾖ' => 'ᾞ', 'ᾗ' => 'ᾟ', 'ᾠ' => 'ᾨ', 'ᾡ' => 'ᾩ', 'ᾢ' => 'ᾪ', 'ᾣ' => 'ᾫ',
		'ᾤ' => 'ᾬ', 'ᾥ' => 'ᾭ', 'ᾦ' => 'ᾮ', 'ᾧ' => 'ᾯ', 'ᾰ' => 'Ᾰ', 'ᾱ' => 'Ᾱ',
		'ῐ' => 'Ῐ', 'ῑ' => 'Ῑ', 'ῠ' => 'Ῠ', 'ῡ' => 'Ῡ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
		'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
		'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
		'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
		'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ',
		'ａ' => 'Ａ', 'ｂ' => 'Ｂ', 'ｃ' => 'Ｃ', 'ｄ' => 'Ｄ', 'ｅ' => 'Ｅ', 'ｆ' => 'Ｆ',
		'ｇ' => 'Ｇ', 'ｈ' => 'Ｈ', 'ｉ' => 'Ｉ', 'ｊ' => 'Ｊ', 'ｋ' => 'Ｋ', 'ｌ' => 'Ｌ',
		'ｍ' => 'Ｍ', 'ｎ' => 'Ｎ', 'ｏ' => 'Ｏ', 'ｐ' => 'Ｐ', 'ｑ' => 'Ｑ', 'ｒ' => 'Ｒ',
		'ｓ' => 'Ｓ', 'ｔ' => 'Ｔ', 'ｕ' => 'Ｕ', 'ｖ' => 'Ｖ', 'ｗ' => 'Ｗ', 'ｘ' => 'Ｘ',
		'ｙ' => 'Ｙ', 'ｚ' => 'Ｚ'
	);
	
	/**
	 * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
	 * 
	 * @var array
	 */
	static private $mb_lower_to_upper_fix = array(
		'ɘ' => 'Ǝ', 'ǲ' => 'Ǳ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
		'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
		'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
		'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
		'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
		'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
		'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
		'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
		'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
		'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
		'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ'
	);
	
	/**
	 * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
	 * 
	 * @var array
	 */
	static private $mb_upper_to_lower_fix = array(
		'ǝ' => 'ɘ', 'ǅ' => 'ǆ', 'ǈ' => 'ǉ', 'ǋ' => 'ǌ', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ',
		'Ⴂ' => 'გ', 'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ',
		'Ⴈ' => 'ი', 'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო',
		'Ⴎ' => 'პ', 'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ',
		'Ⴔ' => 'ფ', 'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ',
		'Ⴚ' => 'ც', 'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ',
		'Ⴠ' => 'ჰ', 'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ',
		'ᾈ' => 'ᾀ', 'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ',
		'ᾎ' => 'ᾆ', 'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ',
		'ᾜ' => 'ᾔ', 'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ',
		'ᾪ' => 'ᾢ', 'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ',
		'Ⓐ' => 'ⓐ', 'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ',
		'Ⓖ' => 'ⓖ', 'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ',
		'Ⓜ' => 'ⓜ', 'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ',
		'Ⓢ' => 'ⓢ', 'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ',
		'Ⓨ' => 'ⓨ', 'Ⓩ' => 'ⓩ'
	);
	
	/**
	 * All uppercase UTF-8 characters mapped to lowercase characters
	 * 
	 * @var array
	 */
	static private $upper_to_lower = array(
		'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
		'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
		'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
		'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
		'Y' => 'y', 'Z' => 'z', 'À' => 'à', 'Á' => 'á', 'Â' => 'â', 'Ã' => 'ã',
		'Ä' => 'ä', 'Å' => 'å', 'Æ' => 'æ', 'Ç' => 'ç', 'È' => 'è', 'É' => 'é',
		'Ê' => 'ê', 'Ë' => 'ë', 'Ì' => 'ì', 'Í' => 'í', 'Î' => 'î', 'Ï' => 'ï',
		'Ð' => 'ð', 'Ñ' => 'ñ', 'Ò' => 'ò', 'Ó' => 'ó', 'Ô' => 'ô', 'Õ' => 'õ',
		'Ö' => 'ö', 'Ø' => 'ø', 'Ù' => 'ù', 'Ú' => 'ú', 'Û' => 'û', 'Ü' => 'ü',
		'Ý' => 'ý', 'Þ' => 'þ', 'Ā' => 'ā', 'Ă' => 'ă', 'Ą' => 'ą', 'Ć' => 'ć',
		'Ĉ' => 'ĉ', 'Ċ' => 'ċ', 'Č' => 'č', 'Ď' => 'ď', 'Đ' => 'đ', 'Ē' => 'ē',
		'Ĕ' => 'ĕ', 'Ė' => 'ė', 'Ę' => 'ę', 'Ě' => 'ě', 'Ĝ' => 'ĝ', 'Ğ' => 'ğ',
		'Ġ' => 'ġ', 'Ģ' => 'ģ', 'Ĥ' => 'ĥ', 'Ħ' => 'ħ', 'Ĩ' => 'ĩ', 'Ī' => 'ī',
		'Ĭ' => 'ĭ', 'Į' => 'į', 'İ' => 'i', 'Ĳ' => 'ĳ', 'Ĵ' => 'ĵ', 'Ķ' => 'ķ',
		'Ĺ' => 'ĺ', 'Ļ' => 'ļ', 'Ľ' => 'ľ', 'Ŀ' => 'ŀ', 'Ł' => 'ł', 'Ń' => 'ń',
		'Ņ' => 'ņ', 'Ň' => 'ň', 'Ŋ' => 'ŋ', 'Ō' => 'ō', 'Ŏ' => 'ŏ', 'Ő' => 'ő',
		'Œ' => 'œ', 'Ŕ' => 'ŕ', 'Ŗ' => 'ŗ', 'Ř' => 'ř', 'Ś' => 'ś', 'Ŝ' => 'ŝ',
		'Ş' => 'ş', 'Š' => 'š', 'Ţ' => 'ţ', 'Ť' => 'ť', 'Ŧ' => 'ŧ', 'Ũ' => 'ũ',
		'Ū' => 'ū', 'Ŭ' => 'ŭ', 'Ů' => 'ů', 'Ű' => 'ű', 'Ų' => 'ų', 'Ŵ' => 'ŵ',
		'Ŷ' => 'ŷ', 'Ÿ' => 'ÿ', 'Ź' => 'ź', 'Ż' => 'ż', 'Ž' => 'ž', 'Ɓ' => 'ɓ',
		'Ƃ' => 'ƃ', 'Ƅ' => 'ƅ', 'Ɔ' => 'ɔ', 'Ƈ' => 'ƈ', 'Ɗ' => 'ɗ', 'Ƌ' => 'ƌ',
		'Ǝ' => 'ɘ', 'Ə' => 'ə', 'Ɛ' => 'ɛ', 'Ƒ' => 'ƒ', 'Ɠ' => 'ɠ', 'Ɣ' => 'ɣ',
		'Ɩ' => 'ɩ', 'Ɨ' => 'ɨ', 'Ƙ' => 'ƙ', 'Ɯ' => 'ɯ', 'Ɲ' => 'ɲ', 'Ɵ' => 'ɵ',
		'Ơ' => 'ơ', 'Ƣ' => 'ƣ', 'Ƥ' => 'ƥ', 'Ƨ' => 'ƨ', 'Ʃ' => 'ʃ', 'Ƭ' => 'ƭ',
		'Ʈ' => 'ʈ', 'Ư' => 'ư', 'Ʊ' => 'ʊ', 'Ʋ' => 'ʋ', 'Ƴ' => 'ƴ', 'Ƶ' => 'ƶ',
		'Ʒ' => 'ʒ', 'Ƹ' => 'ƹ', 'Ƽ' => 'ƽ', 'Ǆ' => 'ǆ', 'ǅ' => 'ǆ', 'Ǉ' => 'ǉ',
		'ǈ' => 'ǉ', 'Ǌ' => 'ǌ', 'ǋ' => 'ǌ', 'Ǎ' => 'ǎ', 'Ǐ' => 'ǐ', 'Ǒ' => 'ǒ',
		'Ǔ' => 'ǔ', 'Ǖ' => 'ǖ', 'Ǘ' => 'ǘ', 'Ǚ' => 'ǚ', 'Ǜ' => 'ǜ', 'Ǟ' => 'ǟ',
		'Ǡ' => 'ǡ', 'Ǣ' => 'ǣ', 'Ǥ' => 'ǥ', 'Ǧ' => 'ǧ', 'Ǩ' => 'ǩ', 'Ǫ' => 'ǫ',
		'Ǭ' => 'ǭ', 'Ǯ' => 'ǯ', 'Ǳ' => 'ǳ', 'Ǵ' => 'ǵ', 'Ǻ' => 'ǻ', 'Ǽ' => 'ǽ',
		'Ǿ' => 'ǿ', 'Ȁ' => 'ȁ', 'Ȃ' => 'ȃ', 'Ȅ' => 'ȅ', 'Ȇ' => 'ȇ', 'Ȉ' => 'ȉ',
		'Ȋ' => 'ȋ', 'Ȍ' => 'ȍ', 'Ȏ' => 'ȏ', 'Ȑ' => 'ȑ', 'Ȓ' => 'ȓ', 'Ȕ' => 'ȕ',
		'Ȗ' => 'ȗ', 'Ά' => 'ά', 'Έ' => 'έ', 'Ή' => 'ή', 'Ί' => 'ί', 'Ό' => 'ό',
		'Ύ' => 'ύ', 'Ώ' => 'ώ', 'Α' => 'α', 'Β' => 'β', 'Γ' => 'γ', 'Δ' => 'δ',
		'Ε' => 'ε', 'Ζ' => 'ζ', 'Η' => 'η', 'Θ' => 'θ', 'Ι' => 'ι', 'Κ' => 'κ',
		'Λ' => 'λ', 'Μ' => 'μ', 'Ν' => 'ν', 'Ξ' => 'ξ', 'Ο' => 'ο', 'Π' => 'π',
		'Ρ' => 'ρ', 'Σ' => 'σ', 'Τ' => 'τ', 'Υ' => 'υ', 'Φ' => 'φ', 'Χ' => 'χ',
		'Ψ' => 'ψ', 'Ω' => 'ω', 'Ϊ' => 'ϊ', 'Ϋ' => 'ϋ', 'Ϣ' => 'ϣ', 'Ϥ' => 'ϥ',
		'Ϧ' => 'ϧ', 'Ϩ' => 'ϩ', 'Ϫ' => 'ϫ', 'Ϭ' => 'ϭ', 'Ϯ' => 'ϯ', 'Ё' => 'ё',
		'Ђ' => 'ђ', 'Ѓ' => 'ѓ', 'Є' => 'є', 'Ѕ' => 'ѕ', 'І' => 'і', 'Ї' => 'ї',
		'Ј' => 'ј', 'Љ' => 'љ', 'Њ' => 'њ', 'Ћ' => 'ћ', 'Ќ' => 'ќ', 'Ў' => 'ў',
		'Џ' => 'џ', 'А' => 'а', 'Б' => 'б', 'В' => 'в', 'Г' => 'г', 'Д' => 'д',
		'Е' => 'е', 'Ж' => 'ж', 'З' => 'з', 'И' => 'и', 'Й' => 'й', 'К' => 'к',
		'Л' => 'л', 'М' => 'м', 'Н' => 'н', 'О' => 'о', 'П' => 'п', 'Р' => 'р',
		'С' => 'с', 'Т' => 'т', 'У' => 'у', 'Ф' => 'ф', 'Х' => 'х', 'Ц' => 'ц',
		'Ч' => 'ч', 'Ш' => 'ш', 'Щ' => 'щ', 'Ъ' => 'ъ', 'Ы' => 'ы', 'Ь' => 'ь',
		'Э' => 'э', 'Ю' => 'ю', 'Я' => 'я', 'Ѡ' => 'ѡ', 'Ѣ' => 'ѣ', 'Ѥ' => 'ѥ',
		'Ѧ' => 'ѧ', 'Ѩ' => 'ѩ', 'Ѫ' => 'ѫ', 'Ѭ' => 'ѭ', 'Ѯ' => 'ѯ', 'Ѱ' => 'ѱ',
		'Ѳ' => 'ѳ', 'Ѵ' => 'ѵ', 'Ѷ' => 'ѷ', 'Ѹ' => 'ѹ', 'Ѻ' => 'ѻ', 'Ѽ' => 'ѽ',
		'Ѿ' => 'ѿ', 'Ҁ' => 'ҁ', 'Ґ' => 'ґ', 'Ғ' => 'ғ', 'Ҕ' => 'ҕ', 'Җ' => 'җ',
		'Ҙ' => 'ҙ', 'Қ' => 'қ', 'Ҝ' => 'ҝ', 'Ҟ' => 'ҟ', 'Ҡ' => 'ҡ', 'Ң' => 'ң',
		'Ҥ' => 'ҥ', 'Ҧ' => 'ҧ', 'Ҩ' => 'ҩ', 'Ҫ' => 'ҫ', 'Ҭ' => 'ҭ', 'Ү' => 'ү',
		'Ұ' => 'ұ', 'Ҳ' => 'ҳ', 'Ҵ' => 'ҵ', 'Ҷ' => 'ҷ', 'Ҹ' => 'ҹ', 'Һ' => 'һ',
		'Ҽ' => 'ҽ', 'Ҿ' => 'ҿ', 'Ӂ' => 'ӂ', 'Ӄ' => 'ӄ', 'Ӈ' => 'ӈ', 'Ӌ' => 'ӌ',
		'Ӑ' => 'ӑ', 'Ӓ' => 'ӓ', 'Ӕ' => 'ӕ', 'Ӗ' => 'ӗ', 'Ә' => 'ә', 'Ӛ' => 'ӛ',
		'Ӝ' => 'ӝ', 'Ӟ' => 'ӟ', 'Ӡ' => 'ӡ', 'Ӣ' => 'ӣ', 'Ӥ' => 'ӥ', 'Ӧ' => 'ӧ',
		'Ө' => 'ө', 'Ӫ' => 'ӫ', 'Ӯ' => 'ӯ', 'Ӱ' => 'ӱ', 'Ӳ' => 'ӳ', 'Ӵ' => 'ӵ',
		'Ӹ' => 'ӹ', 'Ա' => 'ա', 'Բ' => 'բ', 'Գ' => 'գ', 'Դ' => 'դ', 'Ե' => 'ե',
		'Զ' => 'զ', 'Է' => 'է', 'Ը' => 'ը', 'Թ' => 'թ', 'Ժ' => 'ժ', 'Ի' => 'ի',
		'Լ' => 'լ', 'Խ' => 'խ', 'Ծ' => 'ծ', 'Կ' => 'կ', 'Հ' => 'հ', 'Ձ' => 'ձ',
		'Ղ' => 'ղ', 'Ճ' => 'ճ', 'Մ' => 'մ', 'Յ' => 'յ', 'Ն' => 'ն', 'Շ' => 'շ',
		'Ո' => 'ո', 'Չ' => 'չ', 'Պ' => 'պ', 'Ջ' => 'ջ', 'Ռ' => 'ռ', 'Ս' => 'ս',
		'Վ' => 'վ', 'Տ' => 'տ', 'Ր' => 'ր', 'Ց' => 'ց', 'Ւ' => 'ւ', 'Փ' => 'փ',
		'Ք' => 'ք', 'Օ' => 'օ', 'Ֆ' => 'ֆ', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ', 'Ⴂ' => 'გ',
		'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ', 'Ⴈ' => 'ი',
		'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო', 'Ⴎ' => 'პ',
		'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ', 'Ⴔ' => 'ფ',
		'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ', 'Ⴚ' => 'ც',
		'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ', 'Ⴠ' => 'ჰ',
		'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ', 'Ḁ' => 'ḁ',
		'Ḃ' => 'ḃ', 'Ḅ' => 'ḅ', 'Ḇ' => 'ḇ', 'Ḉ' => 'ḉ', 'Ḋ' => 'ḋ', 'Ḍ' => 'ḍ',
		'Ḏ' => 'ḏ', 'Ḑ' => 'ḑ', 'Ḓ' => 'ḓ', 'Ḕ' => 'ḕ', 'Ḗ' => 'ḗ', 'Ḙ' => 'ḙ',
		'Ḛ' => 'ḛ', 'Ḝ' => 'ḝ', 'Ḟ' => 'ḟ', 'Ḡ' => 'ḡ', 'Ḣ' => 'ḣ', 'Ḥ' => 'ḥ',
		'Ḧ' => 'ḧ', 'Ḩ' => 'ḩ', 'Ḫ' => 'ḫ', 'Ḭ' => 'ḭ', 'Ḯ' => 'ḯ', 'Ḱ' => 'ḱ',
		'Ḳ' => 'ḳ', 'Ḵ' => 'ḵ', 'Ḷ' => 'ḷ', 'Ḹ' => 'ḹ', 'Ḻ' => 'ḻ', 'Ḽ' => 'ḽ',
		'Ḿ' => 'ḿ', 'Ṁ' => 'ṁ', 'Ṃ' => 'ṃ', 'Ṅ' => 'ṅ', 'Ṇ' => 'ṇ', 'Ṉ' => 'ṉ',
		'Ṋ' => 'ṋ', 'Ṍ' => 'ṍ', 'Ṏ' => 'ṏ', 'Ṑ' => 'ṑ', 'Ṓ' => 'ṓ', 'Ṕ' => 'ṕ',
		'Ṗ' => 'ṗ', 'Ṙ' => 'ṙ', 'Ṛ' => 'ṛ', 'Ṝ' => 'ṝ', 'Ṟ' => 'ṟ', 'Ṡ' => 'ṡ',
		'Ṣ' => 'ṣ', 'Ṥ' => 'ṥ', 'Ṧ' => 'ṧ', 'Ṩ' => 'ṩ', 'Ṫ' => 'ṫ', 'Ṭ' => 'ṭ',
		'Ṯ' => 'ṯ', 'Ṱ' => 'ṱ', 'Ṳ' => 'ṳ', 'Ṵ' => 'ṵ', 'Ṷ' => 'ṷ', 'Ṹ' => 'ṹ',
		'Ṻ' => 'ṻ', 'Ṽ' => 'ṽ', 'Ṿ' => 'ṿ', 'Ẁ' => 'ẁ', 'Ẃ' => 'ẃ', 'Ẅ' => 'ẅ',
		'Ẇ' => 'ẇ', 'Ẉ' => 'ẉ', 'Ẋ' => 'ẋ', 'Ẍ' => 'ẍ', 'Ẏ' => 'ẏ', 'Ẑ' => 'ẑ',
		'Ẓ' => 'ẓ', 'Ẕ' => 'ẕ', 'Ạ' => 'ạ', 'Ả' => 'ả', 'Ấ' => 'ấ', 'Ầ' => 'ầ',
		'Ẩ' => 'ẩ', 'Ẫ' => 'ẫ', 'Ậ' => 'ậ', 'Ắ' => 'ắ', 'Ằ' => 'ằ', 'Ẳ' => 'ẳ',
		'Ẵ' => 'ẵ', 'Ặ' => 'ặ', 'Ẹ' => 'ẹ', 'Ẻ' => 'ẻ', 'Ẽ' => 'ẽ', 'Ế' => 'ế',
		'Ề' => 'ề', 'Ể' => 'ể', 'Ễ' => 'ễ', 'Ệ' => 'ệ', 'Ỉ' => 'ỉ', 'Ị' => 'ị',
		'Ọ' => 'ọ', 'Ỏ' => 'ỏ', 'Ố' => 'ố', 'Ồ' => 'ồ', 'Ổ' => 'ổ', 'Ỗ' => 'ỗ',
		'Ộ' => 'ộ', 'Ớ' => 'ớ', 'Ờ' => 'ờ', 'Ở' => 'ở', 'Ỡ' => 'ỡ', 'Ợ' => 'ợ',
		'Ụ' => 'ụ', 'Ủ' => 'ủ', 'Ứ' => 'ứ', 'Ừ' => 'ừ', 'Ử' => 'ử', 'Ữ' => 'ữ',
		'Ự' => 'ự', 'Ỳ' => 'ỳ', 'Ỵ' => 'ỵ', 'Ỷ' => 'ỷ', 'Ỹ' => 'ỹ', 'Ἀ' => 'ἀ',
		'Ἁ' => 'ἁ', 'Ἂ' => 'ἂ', 'Ἃ' => 'ἃ', 'Ἄ' => 'ἄ', 'Ἅ' => 'ἅ', 'Ἆ' => 'ἆ',
		'Ἇ' => 'ἇ', 'Ἐ' => 'ἐ', 'Ἑ' => 'ἑ', 'Ἒ' => 'ἒ', 'Ἓ' => 'ἓ', 'Ἔ' => 'ἔ',
		'Ἕ' => 'ἕ', 'Ἠ' => 'ἠ', 'Ἡ' => 'ἡ', 'Ἢ' => 'ἢ', 'Ἣ' => 'ἣ', 'Ἤ' => 'ἤ',
		'Ἥ' => 'ἥ', 'Ἦ' => 'ἦ', 'Ἧ' => 'ἧ', 'Ἰ' => 'ἰ', 'Ἱ' => 'ἱ', 'Ἲ' => 'ἲ',
		'Ἳ' => 'ἳ', 'Ἴ' => 'ἴ', 'Ἵ' => 'ἵ', 'Ἶ' => 'ἶ', 'Ἷ' => 'ἷ', 'Ὀ' => 'ὀ',
		'Ὁ' => 'ὁ', 'Ὂ' => 'ὂ', 'Ὃ' => 'ὃ', 'Ὄ' => 'ὄ', 'Ὅ' => 'ὅ', 'Ὑ' => 'ὑ',
		'Ὓ' => 'ὓ', 'Ὕ' => 'ὕ', 'Ὗ' => 'ὗ', 'Ὠ' => 'ὠ', 'Ὡ' => 'ὡ', 'Ὢ' => 'ὢ',
		'Ὣ' => 'ὣ', 'Ὤ' => 'ὤ', 'Ὥ' => 'ὥ', 'Ὦ' => 'ὦ', 'Ὧ' => 'ὧ', 'ᾈ' => 'ᾀ',
		'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ', 'ᾎ' => 'ᾆ',
		'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ', 'ᾜ' => 'ᾔ',
		'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ', 'ᾪ' => 'ᾢ',
		'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ', 'Ᾰ' => 'ᾰ',
		'Ᾱ' => 'ᾱ', 'Ῐ' => 'ῐ', 'Ῑ' => 'ῑ', 'Ῠ' => 'ῠ', 'Ῡ' => 'ῡ', 'Ⓐ' => 'ⓐ',
		'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ', 'Ⓖ' => 'ⓖ',
		'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ', 'Ⓜ' => 'ⓜ',
		'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ', 'Ⓢ' => 'ⓢ',
		'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ', 'Ⓨ' => 'ⓨ',
		'Ⓩ' => 'ⓩ', 'Ａ' => 'ａ', 'Ｂ' => 'ｂ', 'Ｃ' => 'ｃ', 'Ｄ' => 'ｄ', 'Ｅ' => 'ｅ',
		'Ｆ' => 'ｆ', 'Ｇ' => 'ｇ', 'Ｈ' => 'ｈ', 'Ｉ' => 'ｉ', 'Ｊ' => 'ｊ', 'Ｋ' => 'ｋ',
		'Ｌ' => 'ｌ', 'Ｍ' => 'ｍ', 'Ｎ' => 'ｎ', 'Ｏ' => 'ｏ', 'Ｐ' => 'ｐ', 'Ｑ' => 'ｑ',
		'Ｒ' => 'ｒ', 'Ｓ' => 'ｓ', 'Ｔ' => 'ｔ', 'Ｕ' => 'ｕ', 'Ｖ' => 'ｖ', 'Ｗ' => 'ｗ',
		'Ｘ' => 'ｘ', 'Ｙ' => 'ｙ', 'Ｚ' => 'ｚ'
	);
	
	/**
	 * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
	 * 
	 * Includes elements form the following unicode blocks:
	 * 
	 *  - Latin-1 Supplement
	 *  - Latin Extended-A
	 *  - Latin Extended-B
	 *  - IPA Extensions
	 *  - Latin Extended Additional
	 *  - General Punctuation
	 *  - Letterlike symbols
	 *  - Number Forms
	 * 
	 * @var array
	 */
	static private $utf8_to_ascii = array(
		// Latin-1 Supplement
		'©' => '(c)', '«' => '<<',  '®' => '(R)', '»' => '>>',  '¼' => '1/4',
		'½' => '1/2', '¾' => '3/4', 'À' => 'A',   'Á' => 'A',   'Â' => 'A',
		'Ã' => 'A',   'Ä' => 'A',   'Å' => 'A',   'Æ' => 'AE',  'Ç' => 'C',
		'È' => 'E',   'É' => 'E',   'Ê' => 'E',   'Ë' => 'E',   'Ì' => 'I',
		'Í' => 'I',   'Î' => 'I',   'Ï' => 'I',   'Ñ' => 'N',   'Ò' => 'O',
		'Ó' => 'O',   'Ô' => 'O',   'Õ' => 'O',   'Ö' => 'O',   'Ø' => 'O',
		'Ù' => 'U',   'Ú' => 'U',   'Û' => 'U',   'Ü' => 'U',   'Ý' => 'Y',
		'à' => 'a',   'á' => 'a',   'â' => 'a',   'ã' => 'a',   'ä' => 'a',
		'å' => 'a',   'æ' => 'ae',  'ç' => 'c',   'è' => 'e',   'é' => 'e',
		'ê' => 'e',   'ë' => 'e',   'ì' => 'i',   'í' => 'i',   'î' => 'i',
		'ï' => 'i',   'ñ' => 'n',   'ò' => 'o',   'ó' => 'o',   'ô' => 'o',
		'õ' => 'o',   'ö' => 'o',   'ø' => 'o',   'ù' => 'u',   'ú' => 'u',
		'û' => 'u',   'ü' => 'u',   'ý' => 'y',   'ÿ' => 'y',
		// Latin Extended-A
		'Ā' => 'A',   'ā' => 'a',   'Ă' => 'A',   'ă' => 'a',   'Ą' => 'A',
		'ą' => 'a',   'Ć' => 'C',   'ć' => 'c',   'Ĉ' => 'C',   'ĉ' => 'c',
		'Ċ' => 'C',   'ċ' => 'c',   'Č' => 'C',   'č' => 'c',   'Ď' => 'D',
		'ď' => 'd',   'Đ' => 'D',   'đ' => 'd',   'Ē' => 'E',   'ē' => 'e',
		'Ĕ' => 'E',   'ĕ' => 'e',   'Ė' => 'E',   'ė' => 'e',   'Ę' => 'E',
		'ę' => 'e',   'Ě' => 'E',   'ě' => 'e',   'Ĝ' => 'G',   'ĝ' => 'g',
		'Ğ' => 'G',   'ğ' => 'g',   'Ġ' => 'G',   'ġ' => 'g',   'Ģ' => 'G',
		'ģ' => 'g',   'Ĥ' => 'H',   'ĥ' => 'h',   'Ħ' => 'H',   'ħ' => 'h',
		'Ĩ' => 'I',   'ĩ' => 'i',   'Ī' => 'I',   'ī' => 'i',   'Ĭ' => 'I',
		'ĭ' => 'i',   'Į' => 'I',   'į' => 'i',   'İ' => 'I',   'ı' => 'i',
		'Ĳ' => 'IJ',  'ĳ' => 'ij',  'Ĵ' => 'J',   'ĵ' => 'j',   'Ķ' => 'K',
		'ķ' => 'k',   'Ĺ' => 'L',   'ĺ' => 'l',   'Ļ' => 'L',   'ļ' => 'l',
		'Ľ' => 'L',   'ľ' => 'l',   'Ŀ' => 'L',   'ŀ' => 'l',   'Ł' => 'L',
		'ł' => 'l',   'Ń' => 'N',   'ń' => 'n',   'Ņ' => 'N',   'ņ' => 'n',
		'Ň' => 'N',   'ň' => 'n',   'ŉ' => "'n", 'Ŋ' => 'N',   'ŋ' => 'n',
		'Ō' => 'O',   'ō' => 'o',   'Ŏ' => 'O',   'ŏ' => 'o',   'Ő' => 'O',
		'ő' => 'o',   'Œ' => 'OE',  'œ' => 'oe',  'Ŕ' => 'R',   'ŕ' => 'r',
		'Ŗ' => 'R',   'ŗ' => 'r',   'Ř' => 'R',   'ř' => 'r',   'Ś' => 'S',
		'ś' => 's',   'Ŝ' => 'S',   'ŝ' => 's',   'Ş' => 'S',   'ş' => 's',
		'Š' => 'S',   'š' => 's',   'Ţ' => 'T',   'ţ' => 't',   'Ť' => 'T',
		'ť' => 't',   'Ŧ' => 'T',   'ŧ' => 't',   'Ũ' => 'U',   'ũ' => 'u',
		'Ū' => 'U',   'ū' => 'u',   'Ŭ' => 'U',   'ŭ' => 'u',   'Ů' => 'U',
		'ů' => 'u',   'Ű' => 'U',   'ű' => 'u',   'Ų' => 'U',   'ų' => 'u',
		'Ŵ' => 'W',   'ŵ' => 'w',   'Ŷ' => 'Y',   'ŷ' => 'y',   'Ÿ' => 'Y',
		'Ź' => 'Z',   'ź' => 'z',   'Ż' => 'Z',   'ż' => 'z',   'Ž' => 'Z',
		'ž' => 'z',
		// Latin Extended-B
		'ƀ' => 'b',   'Ɓ' => 'B',   'Ƃ' => 'B',   'ƃ' => 'b',   'Ɔ' => 'O',
		'Ƈ' => 'C',   'ƈ' => 'c',   'Ɖ' => 'D',   'Ɗ' => 'D',   'Ƌ' => 'D',
		'ƌ' => 'd',   'Ǝ' => 'E',   'Ɛ' => 'E',   'Ƒ' => 'F',   'ƒ' => 'f',
		'Ɠ' => 'G',   'Ɨ' => 'I',   'Ƙ' => 'K',   'ƙ' => 'k',   'ƚ' => 'l',
		'Ɯ' => 'M',   'Ɲ' => 'N',   'ƞ' => 'n',   'Ɵ' => 'O',   'Ơ' => 'O',
		'ơ' => 'o',   'Ƣ' => 'OI',  'ƣ' => 'oi',  'Ƥ' => 'P',   'ƥ' => 'p',
		'ƫ' => 't',   'Ƭ' => 'T',   'ƭ' => 't',   'Ʈ' => 'T',   'Ư' => 'U',
		'ư' => 'u',   'Ʋ' => 'V',   'Ƴ' => 'Y',   'ƴ' => 'y',   'Ƶ' => 'Z',
		'ƶ' => 'z',   'ƻ' => '2',   'Ǆ' => 'DZ',  'ǅ' => 'Dz',  'ǆ' => 'dz',
		'Ǉ' => 'LJ',  'ǈ' => 'Lj',  'ǉ' => 'lj',  'Ǌ' => 'Nj',  'ǋ' => 'Nj',
		'ǌ' => 'nj',  'Ǎ' => 'A',   'ǎ' => 'a',   'Ǐ' => 'I',   'ǐ' => 'i',
		'Ǒ' => 'O',   'ǒ' => 'o',   'Ǔ' => 'U',   'ǔ' => 'u',   'Ǖ' => 'U',
		'ǖ' => 'u',   'Ǘ' => 'U',   'ǘ' => 'u',   'Ǚ' => 'U',   'ǚ' => 'u',
		'Ǜ' => 'U',   'ǜ' => 'u',   'ǝ' => 'e',   'Ǟ' => 'A',   'ǟ' => 'a',
		'Ǡ' => 'A',   'ǡ' => 'a',   'Ǣ' => 'AE',  'ǣ' => 'ae',  'Ǥ' => 'G',
		'ǥ' => 'g',   'Ǧ' => 'G',   'ǧ' => 'g',   'Ǩ' => 'K',   'ǩ' => 'k',
		'Ǫ' => 'O',   'ǫ' => 'o',   'Ǭ' => 'O',   'ǭ' => 'o',   'ǰ' => 'j',
		'Ǳ' => 'DZ',  'ǲ' => 'Dz',  'ǳ' => 'dz',  'Ǵ' => 'G',   'ǵ' => 'g',
		'Ǹ' => 'N',   'ǹ' => 'n',   'Ǻ' => 'A',   'ǻ' => 'a',   'Ǽ' => 'AE',
		'ǽ' => 'ae',  'Ǿ' => 'O',   'ǿ' => 'o',   'Ȁ' => 'A',   'ȁ' => 'a',
		'Ȃ' => 'A',   'ȃ' => 'a',   'Ȅ' => 'E',   'ȅ' => 'e',   'Ȇ' => 'E',
		'ȇ' => 'e',   'Ȉ' => 'I',   'ȉ' => 'i',   'Ȋ' => 'I',   'ȋ' => 'i',
		'Ȍ' => 'O',   'ȍ' => 'o',   'Ȏ' => 'O',   'ȏ' => 'o',   'Ȑ' => 'R',
		'ȑ' => 'r',   'Ȓ' => 'R',   'ȓ' => 'r',   'Ȕ' => 'U',   'ȕ' => 'u',
		'Ȗ' => 'U',   'ȗ' => 'u',   'Ș' => 'S',   'ș' => 's',   'Ț' => 'T',
		'ț' => 't',   'Ȟ' => 'H',   'ȟ' => 'h',   'Ƞ' => 'N',   'ȡ' => 'd',
		'Ȥ' => 'Z',   'ȥ' => 'z',   'Ȧ' => 'A',   'ȧ' => 'a',   'Ȩ' => 'E',
		'ȩ' => 'e',   'Ȫ' => 'O',   'ȫ' => 'o',   'Ȭ' => 'O',   'ȭ' => 'o',
		'Ȯ' => 'O',   'ȯ' => 'o',   'Ȱ' => 'O',   'ȱ' => 'o',   'Ȳ' => 'Y',
		'ȳ' => 'y',   'ȴ' => 'l',   'ȵ' => 'n',   'ȶ' => 't',   'ȷ' => 'j',
		'ȸ' => 'db',  'ȹ' => 'qp',  'Ⱥ' => 'A',   'Ȼ' => 'C',   'ȼ' => 'c',
		'Ƚ' => 'L',   'Ⱦ' => 'T',   'ȿ' => 's',   'ɀ' => 'z',   'Ƀ' => 'B',
		'Ʉ' => 'U',   'Ʌ' => 'V',   'Ɇ' => 'E',   'ɇ' => 'e',   'Ɉ' => 'J',
		'ɉ' => 'j',   'Ɋ' => 'Q',   'ɋ' => 'q',   'Ɍ' => 'R',   'ɍ' => 'r',
		'Ɏ' => 'Y',   'ɏ' => 'y',
		// IPA Extensions
		'ɐ' => 'a',   'ɓ' => 'b',   'ɔ' => 'o',   'ɕ' => 'c',   'ɖ' => 'd',
		'ɗ' => 'd',   'ɘ' => 'e',   'ɛ' => 'e',   'ɜ' => 'e',   'ɝ' => 'e',
		'ɞ' => 'e',   'ɟ' => 'j',   'ɠ' => 'g',   'ɡ' => 'g',   'ɢ' => 'G',
		'ɥ' => 'h',   'ɦ' => 'h',   'ɨ' => 'i',   'ɪ' => 'I',   'ɫ' => 'l',
		'ɬ' => 'l',   'ɭ' => 'l',   'ɯ' => 'm',   'ɰ' => 'm',   'ɱ' => 'm',
		'ɲ' => 'n',   'ɳ' => 'n',   'ɴ' => 'N',   'ɵ' => 'o',   'ɶ' => 'OE',
		'ɹ' => 'r',   'ɺ' => 'r',   'ɻ' => 'r',   'ɼ' => 'r',   'ɽ' => 'r',
		'ɾ' => 'r',   'ɿ' => 'r',   'ʀ' => 'R',   'ʁ' => 'R',   'ʂ' => 's',
		'ʇ' => 't',   'ʈ' => 't',   'ʉ' => 'u',   'ʋ' => 'v',   'ʌ' => 'v',
		'ʍ' => 'w',   'ʎ' => 'y',   'ʏ' => 'Y',   'ʐ' => 'z',   'ʑ' => 'z',
		'ʗ' => 'C',   'ʙ' => 'B',   'ʚ' => 'e',   'ʛ' => 'G',   'ʜ' => 'H',
		'ʝ' => 'j',   'ʞ' => 'k',   'ʟ' => 'L',   'ʠ' => 'q',   'ʣ' => 'dz',
		'ʥ' => 'dz',  'ʦ' => 'ts',  'ʨ' => 'tc',  'ʪ' => 'ls',  'ʫ' => 'lz',
		'ʮ' => 'h',   'ʯ' => 'h',
		// Latin Extended Additional
		'Ḁ' => 'A',   'ḁ' => 'a',   'Ḃ' => 'B',   'ḃ' => 'b',   'Ḅ' => 'B',
		'ḅ' => 'b',   'Ḇ' => 'B',   'ḇ' => 'b',   'Ḉ' => 'C',   'ḉ' => 'c',
		'Ḋ' => 'D',   'ḋ' => 'd',   'Ḍ' => 'D',   'ḍ' => 'd',   'Ḏ' => 'D',
		'ḏ' => 'd',   'Ḑ' => 'D',   'ḑ' => 'd',   'Ḓ' => 'D',   'ḓ' => 'd',
		'Ḕ' => 'E',   'ḕ' => 'e',   'Ḗ' => 'E',   'ḗ' => 'e',   'Ḙ' => 'E',
		'ḙ' => 'e',   'Ḛ' => 'E',   'ḛ' => 'e',   'Ḝ' => 'E',   'ḝ' => 'e',
		'Ḟ' => 'F',   'ḟ' => 'f',   'Ḡ' => 'G',   'ḡ' => 'g',   'Ḣ' => 'H',
		'ḣ' => 'h',   'Ḥ' => 'H',   'ḥ' => 'h',   'Ḧ' => 'H',   'ḧ' => 'h',
		'Ḩ' => 'H',   'ḩ' => 'h',   'Ḫ' => 'H',   'ḫ' => 'h',   'Ḭ' => 'I',
		'ḭ' => 'i',   'Ḯ' => 'I',   'ḯ' => 'i',   'Ḱ' => 'K',   'ḱ' => 'k',
		'Ḳ' => 'K',   'ḳ' => 'k',   'Ḵ' => 'K',   'ḵ' => 'k',   'Ḷ' => 'L',
		'ḷ' => 'l',   'Ḹ' => 'L',   'ḹ' => 'l',   'Ḻ' => 'L',   'ḻ' => 'l',
		'Ḽ' => 'L',   'ḽ' => 'l',   'Ḿ' => 'M',   'ḿ' => 'm',   'Ṁ' => 'M',
		'ṁ' => 'm',   'Ṃ' => 'M',   'ṃ' => 'm',   'Ṅ' => 'N',   'ṅ' => 'n',
		'Ṇ' => 'N',   'ṇ' => 'n',   'Ṉ' => 'N',   'ṉ' => 'n',   'Ṋ' => 'N',
		'ṋ' => 'n',   'Ṍ' => 'O',   'ṍ' => 'o',   'Ṏ' => 'O',   'ṏ' => 'o',
		'Ṑ' => 'O',   'ṑ' => 'o',   'Ṓ' => 'O',   'ṓ' => 'o',   'Ṕ' => 'P',
		'ṕ' => 'p',   'Ṗ' => 'P',   'ṗ' => 'p',   'Ṙ' => 'R',   'ṙ' => 'r',
		'Ṛ' => 'R',   'ṛ' => 'r',   'Ṝ' => 'R',   'ṝ' => 'r',   'Ṟ' => 'R',
		'ṟ' => 'r',   'Ṡ' => 'S',   'ṡ' => 's',   'Ṣ' => 'S',   'ṣ' => 's',
		'Ṥ' => 'S',   'ṥ' => 's',   'Ṧ' => 'S',   'ṧ' => 's',   'Ṩ' => 'S',
		'ṩ' => 's',   'Ṫ' => 'T',   'ṫ' => 't',   'Ṭ' => 'T',   'ṭ' => 't',
		'Ṯ' => 'T',   'ṯ' => 't',   'Ṱ' => 'T',   'ṱ' => 't',   'Ṳ' => 'U',
		'ṳ' => 'u',   'Ṵ' => 'U',   'ṵ' => 'u',   'Ṷ' => 'U',   'ṷ' => 'u',
		'Ṹ' => 'U',   'ṹ' => 'u',   'Ṻ' => 'U',   'ṻ' => 'u',   'Ṽ' => 'V',
		'ṽ' => 'v',   'Ṿ' => 'V',   'ṿ' => 'v',   'Ẁ' => 'W',   'ẁ' => 'w',
		'Ẃ' => 'W',   'ẃ' => 'w',   'Ẅ' => 'W',   'ẅ' => 'w',   'Ẇ' => 'W',
		'ẇ' => 'w',   'Ẉ' => 'W',   'ẉ' => 'w',   'Ẋ' => 'X',   'ẋ' => 'x',
		'Ẍ' => 'X',   'ẍ' => 'x',   'Ẏ' => 'Y',   'ẏ' => 'y',   'Ẑ' => 'Z',
		'ẑ' => 'z',   'Ẓ' => 'Z',   'ẓ' => 'z',   'Ẕ' => 'Z',   'ẕ' => 'z',
		'ẖ' => 'h',   'ẗ' => 't',   'ẘ' => 'w',   'ẙ' => 'y',   'ẚ' => 'a',
		'Ạ' => 'A',   'ạ' => 'a',   'Ả' => 'A',   'ả' => 'a',   'Ấ' => 'A',
		'ấ' => 'a',   'Ầ' => 'A',   'ầ' => 'a',   'Ẩ' => 'A',   'ẩ' => 'a',
		'Ẫ' => 'A',   'ẫ' => 'a',   'Ậ' => 'A',   'ậ' => 'a',   'Ắ' => 'A',
		'ắ' => 'a',   'Ằ' => 'A',   'ằ' => 'a',   'Ẳ' => 'A',   'ẳ' => 'a',
		'Ẵ' => 'A',   'ẵ' => 'a',   'Ặ' => 'A',   'ặ' => 'a',   'Ẹ' => 'E',
		'ẹ' => 'e',   'Ẻ' => 'E',   'ẻ' => 'e',   'Ẽ' => 'E',   'ẽ' => 'e',
		'Ế' => 'E',   'ế' => 'e',   'Ề' => 'E',   'ề' => 'e',   'Ể' => 'E',
		'ể' => 'e',   'Ễ' => 'E',   'ễ' => 'e',   'Ệ' => 'E',   'ệ' => 'e',
		'Ỉ' => 'I',   'ỉ' => 'i',   'Ị' => 'I',   'ị' => 'i',   'Ọ' => 'O',
		'ọ' => 'o',   'Ỏ' => 'O',   'ỏ' => 'o',   'Ố' => 'O',   'ố' => 'o',
		'Ồ' => 'O',   'ồ' => 'o',   'Ổ' => 'O',   'ổ' => 'o',   'Ỗ' => 'O',
		'ỗ' => 'o',   'Ộ' => 'O',   'ộ' => 'o',   'Ớ' => 'O',   'ớ' => 'o',
		'Ờ' => 'O',   'ờ' => 'o',   'Ở' => 'O',   'ở' => 'o',   'Ỡ' => 'O',
		'ỡ' => 'o',   'Ợ' => 'O',   'ợ' => 'o',   'Ụ' => 'U',   'ụ' => 'u',
		'Ủ' => 'U',   'ủ' => 'u',   'Ứ' => 'U',   'ứ' => 'u',   'Ừ' => 'U',
		'ừ' => 'u',   'Ử' => 'U',   'ử' => 'u',   'Ữ' => 'U',   'ữ' => 'u',
		'Ự' => 'U',   'ự' => 'u',   'Ỳ' => 'Y',   'ỳ' => 'y',   'Ỵ' => 'Y',
		'ỵ' => 'y',   'Ỷ' => 'Y',   'ỷ' => 'y',   'Ỹ' => 'Y',   'ỹ' => 'y',
		// General Punctuation
		' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',
		' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',   ' ' => ' ',
		' ' => ' ',   '​' => '',    '‌' => '',    '‍' => '',    '‐' => '-',
		'‑' => '-',   '‒' => '-',   '–' => '-',   '—' => '-',   '―' => '-',
		'‖' => '||',  '‘' => "'",   '’' => "'",   '‚' => ',',   '‛' => "'",
		'“' => '"',   '”' => '"',   '‟' => '"',   '․' => '.',   '‥' => '..',
		'…' => '...', ' ' => ' ',   '′' => "'",   '″' => '"',   '‴' => '\'"',
		'‵' => "'",   '‶' => '"',   '‷' => '"\'', '‹' => '<',   '›' => '>',
		'‼' => '!!',  '‽' => '?!',  '⁄' => '/',   '⁇' => '?/',  '⁈' => '?!',
		'⁉' => '!?',
		// Letterlike Symbols
		'℠' => 'SM',  '™' => 'TM',
		// Number Forms
		'⅓' => '1/3', '⅔' => '2/3', '⅕' => '1/5', '⅖' => '2/5', '⅗' => '3/5',
		'⅘' => '4/5', '⅙' => '1/6', '⅚' => '5/6', '⅛' => '1/8', '⅜' => '3/8',
		'⅝' => '5/8', '⅞' => '7/8', 'Ⅰ' => 'I',   'Ⅱ' => 'II',  'Ⅲ' => 'III',
		'Ⅳ' => 'IV',  'Ⅴ' => 'V',   'Ⅵ' => 'Vi',  'Ⅶ' => 'VII', 'Ⅷ' => 'VIII',
		'Ⅸ' => 'IX',  'Ⅹ' => 'X',   'Ⅺ' => 'XI',  'Ⅻ' => 'XII', 'Ⅼ' => 'L',
		'Ⅽ' => 'C',   'Ⅾ' => 'D',   'Ⅿ' => 'M',   'ⅰ' => 'i',   'ⅱ' => 'ii',
		'ⅲ' => 'iii', 'ⅳ' => 'iv',  'ⅴ' => 'v',   'ⅵ' => 'vi',  'ⅶ' => 'vii',
		'ⅷ' => 'viii','ⅸ' => 'ix',  'ⅹ' => 'x',   'ⅺ' => 'xi',  'ⅻ' => 'xii',
		'ⅼ' => 'l',   'ⅽ' => 'c',   'ⅾ' => 'd',   'ⅿ' => 'm'
	);
	
	/**
	 * If the [http://php.net/mbstring mbstring] extension is available
	 * 
	 * @var boolean
	 */
	static private $mbstring_available = NULL;
	
	
	/**
	 * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
	 * 
	 * Any characters or symbols that can not be translated will be removed.
	 * 
	 * This function is most useful for situation that only allows ASCII, such
	 * as in URLs.
	 * 
	 * Translates elements form the following unicode blocks:
	 * 
	 *  - Latin-1 Supplement
	 *  - Latin Extended-A
	 *  - Latin Extended-B
	 *  - IPA Extensions
	 *  - Latin Extended Additional
	 *  - General Punctuation
	 *  - Letterlike symbols
	 *  - Number Forms
	 * 
	 * @internal
	 * 
	 * @param  string $string  The string to convert
	 * @return string  The input string in pure ASCII
	 */
	static public function ascii($string)
	{
		if (!self::detect($string)) {
			return $string;
		}
		
		$string = strtr($string, self::$utf8_to_ascii);
		return preg_replace('#[^\x00-\x7F]#', '', $string);
	}
	
	
	/**
	 * Checks to see if the [http://php.net/mbstring mbstring] extension is available
	 * 
	 * @return void
	 */
	static private function checkMbString()
	{
		self::$mbstring_available = extension_loaded('mbstring');
	}
	
	
	/**
	 * Converts a unicode value into a UTF-8 character
	 * 
	 * @param  mixed $unicode_code_point  The character to create, either the `U+hex` or decimal code point
	 * @return string  The UTF-8 character
	 */
	static public function chr($unicode_code_point)
	{
		if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
			$unicode_code_point = substr($unicode_code_point, 2);
			$unicode_code_point = hexdec($unicode_code_point);
		}
		
		$bin = decbin($unicode_code_point);
		$digits = strlen($bin);
		
		$first = $second = $third = $fourth = NULL;
		
		// One byte characters
		if ($digits <= 7) {
			$first = chr(bindec($bin));
			
		// Two byte characters
		} elseif ($digits <= 11) {
			$first  = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -6)));
			
		// Three byte characters
		} elseif ($digits <= 16) {
			$first  = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -12, -6)));
			$third  = chr(bindec('10' . substr($bin, -6)));
			
		// Four byte characters
		} elseif ($digits <= 21) {
			$first  = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
			$second = chr(bindec('10' . substr($bin, -18, -12)));
			$third  = chr(bindec('10' . substr($bin, -12, -6)));
			$fourth = chr(bindec('10' . substr($bin, -6)));
		}
		
		$ord = ord($first);
		if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
			throw new fProgrammerException(
				'The code point specified, %s, is invalid.',
				$unicode_code_point
			);
		}
		
		return $first . $second . $third . $fourth;
	}
	
	
	/**
	 * Removes any invalid UTF-8 characters from a string or array of strings
	 * 
	 * @param  array|string $value  The string or array of strings to clean
	 * @return string  The cleaned string
	 */
	static public function clean($value)
	{
		if (!is_array($value)) {
			if (self::$can_ignore_invalid === NULL) {
				self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));	
			}
			fCore::startErrorCapture(E_NOTICE);
			$value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
			fCore::stopErrorCapture();
			return $value;
		}
		
		$keys = array_keys($value);
		$num_keys = sizeof($keys);
		for ($i=0; $i<$num_keys; $i++) {
			$value[$keys[$i]] = self::clean($value[$keys[$i]]);
		}
		
		return $value;
	}
	
	
	/**
	 * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
	 */
	static public function cmp($str1, $str2)
	{
		$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
		$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
		
		$res = strcmp($ascii_str1, $ascii_str2);
		
		// If the ASCII representations are the same, sort by the UTF-8 representations
		if ($res === 0) {
			$res = strcmp($str1, $str2);
		}
		
		return $res;
	}
	
	
	/**
	 * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
	 * 
	 * @param  string  $string  The string to base the offset on
	 * @param  integer $offset  The character offset to conver to bytes
	 * @return integer  The converted offset
	 */
	static private function convertOffsetToBytes($string, $offset)
	{
		if ($offset == 0) {
			return 0;
		}
		
		$len = strlen($string);
		
		$byte_offset     = 0;
		$measured_offset = 0;
		$sign            = 1;
		
		// Negative offsets require us to reverse some stuff
		if ($offset < 0) {
			$string    = strrev($string);
			$sign      = -1;
			$offset    = abs($offset);
		}
			
		for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
			$char = $string[$i];
			++$byte_offset;
			if (ord($char) < 0x80) {
				++$measured_offset;
			} else {
				switch (ord($char) & 0xF0) {
					case 0xF0:
					case 0xE0:
					case 0xD0:
					case 0xC0:
						++$measured_offset;
						break;
				}
			}
		}
		
		return $byte_offset * $sign;
	}
	
	
	/**
	 * Detects if a UTF-8 string contains any non-ASCII characters
	 * 
	 * @param  string $string  The string to check
	 * @return boolean  If the string contains any non-ASCII characters
	 */
	static private function detect($string)
	{
		return (boolean) preg_match('#[^\x00-\x7F]#', $string);
	}
	
	
	/**
	 * Explodes a string on a delimiter
	 * 
	 * If no delimiter is provided, the string will be exploded with each
	 * characters being an element in the array.
	 * 
	 * @param  string  $string     The string to explode
	 * @param  string  $delimiter  The string to explode on. If `NULL` or `''` this method will return one character per array index.
	 * @return array  The exploded string
	 */
	static public function explode($string, $delimiter=NULL)
	{
		// If a delimiter was passed, we just do an explode
		if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
			return explode($delimiter, $string);
		}
		
		// If no delimiter was passed, we explode the characters into an array
		preg_match_all('#.|^\z#us', $string, $matches);
		return $matches[0];
	}


	/**
	 * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
	 * does not seem to properly assign the return value to a variable, but
	 * does work when returning the value.
	 *
	 * @param string $in_charset   The incoming character encoding
	 * @param string $out_charset  The outgoing character encoding
	 * @param string $string       The string to convert
	 * @return string  The converted string
	 */
	static private function iconv($in_charset, $out_charset, $string)
	{
		return iconv($in_charset, $out_charset, $string);
	}
	
	
	/**
	 * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
	 */
	static public function icmp($str1, $str2)
	{
		$str1 = self::lower($str1);
		$str2 = self::lower($str2);
		
		return self::cmp($str1, $str2);
	}
	
	
	/**
	 * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
	 */
	static public function inatcmp($str1, $str2)
	{
		$str1 = self::lower($str1);
		$str2 = self::lower($str2);
		
		return self::natcmp($str1, $str2);
	}
	
	
	/**
	 * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
	 * @param  integer $offset    The character position to start searching from
	 * @return mixed  The integer character position of the first occurence of the needle or `FALSE` if no match
	 */
	static public function ipos($haystack, $needle, $offset=0)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($haystack)) {
			return stripos($haystack, $needle, $offset);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_stripos')) {
			return mb_stripos($haystack, $needle, $offset, 'UTF-8');
		}
		
		$haystack = self::lower($haystack);
		$needle   = self::lower($needle);
		
		return self::pos($haystack, $needle, $offset);
	}
	
	
	/**
	 * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
	 * 
	 * If `$search` and `$replace` are both arrays and `$replace` is shorter,
	 * the extra `$search` string will be replaced with an empty string. If
	 * `$search` is an array and `$replace` is a string, all `$search` values
	 * will be replaced with the string specified.
	 * 
	 * @param  string $string   The string to perform the replacements on
	 * @param  mixed  $search   The string (or array of strings) to search for - see method description for details
	 * @param  mixed  $replace  The string (or array of strings) to replace with - see method description for details
	 * @return string  The input string with the specified replacements
	 */
	static public function ireplace($string, $search, $replace)
	{
		if (is_array($search)) {
			foreach ($search as &$needle) {
				$needle = '#' . preg_quote($needle, '#') . '#ui';
			}
		} else {
			$search = '#' . preg_quote($search, '#') . '#ui';
		}
		return preg_replace(
			$search,
			strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
			$string
		);
	}
	
	
	/**
	 * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for. This match will be done in a case-insensitive manner.
	 * @param  integer $offset    The character position to start searching from. A negative value will stop looking that many characters from the end of the string
	 * @return mixed  The integer character position of the last occurence of the needle or `FALSE` if no match
	 */
	static public function irpos($haystack, $needle, $offset=0)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($haystack)) {
			return strripos($haystack, $needle, $offset);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_strripos')) {
			return mb_strripos($haystack, $needle, $offset, 'UTF-8');
		}
		
		$haystack = self::lower($haystack);
		$needle   = self::lower($needle);
		
		return self::rpos($haystack, $needle, $offset);
	}
	
	
	/**
	 * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
	 * 
	 * Can optionally return the part of the haystack before the needle. Matching
	 * is done in a case-insensitive manner.
	 * 
	 * @param  string  $haystack       The string to search in
	 * @param  string  $needle         The string to search for. This match will be done in a case-insensitive manner.
	 * @param  boolean $before_needle  If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
	 * @return mixed  The specified part of the haystack, or `FALSE` if the needle was not found
	 */
	static public function istr($haystack, $needle, $before_needle=FALSE)
	{
		// We get better performance falling back for ASCII strings
		if ($before_needle == FALSE && !self::detect($haystack)) {
			return stristr($haystack, $needle);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_stristr')) {
			return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
		}
		
		$lower_haystack = self::lower($haystack);
		$lower_needle   = self::lower($needle);
		
		$pos = strpos($lower_haystack, $lower_needle);
		
		if ($before_needle) {
			return substr($haystack, 0, $pos);
		}
		
		return substr($haystack, $pos);
	}
	
	
	/**
	 * Determines the length (in characters) of a string
	 * 
	 * @param  string $string  The string to measure
	 * @return integer  The number of characters in the string
	 */
	static public function len($string)
	{
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			return mb_strlen($string, 'UTF-8');
		}
		
		return strlen(utf8_decode($string));
	}
	
	
	/**
	 * Converts all uppercase characters to lowercase
	 * 
	 * @param  string $string  The string to convert
	 * @return string  The input string with all uppercase characters in lowercase
	 */
	static public function lower($string)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($string)) {
			return strtolower($string);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			$string = mb_strtolower($string, 'utf-8');
			// For some reason mb_strtolower misses some character
			return strtr($string, self::$mb_upper_to_lower_fix);
		}
		
		return strtr($string, self::$upper_to_lower);
	}
	
	
	/**
	 * Trims whitespace, or any specified characters, from the beginning of a string
	 * 
	 * @param  string $string    The string to trim
	 * @param  string $charlist  The characters to trim
	 * @return string  The trimmed string
	 */
	static public function ltrim($string, $charlist=NULL)
	{
		if (strlen($charlist) === 0) {
			return ltrim($string);
		}
		
		$search = preg_quote($charlist, '#');
		$search = str_replace('-', '\-', $search);
		$search = str_replace('\.\.', '-', $search);
		return preg_replace('#^[' . $search . ']+#Du', '', $string);
	}
	
	
	/**
	 * Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
	 * 
	 * Please note that this function sorts based on English language sorting
	 * rules only. Locale-sepcific sorting is done by
	 * [http://php.net/strcoll strcoll()], however there are technical
	 * limitations.
	 * 
	 * @param  string $str1  The first string to compare
	 * @param  string $str2  The second string to compare
	 * @return integer  `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
	 */
	static public function natcmp($str1, $str2)
	{
		$ascii_str1 = strtr($str1, self::$utf8_to_ascii);
		$ascii_str2 = strtr($str2, self::$utf8_to_ascii);
		
		$res = strnatcmp($ascii_str1, $ascii_str2);
		
		// If the ASCII representations are the same, sort by the UTF-8 representations
		if ($res === 0) {
			$res = strnatcmp($str1, $str2);
		}
		
		return $res;
	}
	
	
	/**
	 * Converts a UTF-8 character to a unicode code point
	 * 
	 * @param  string $character  The character to decode
	 * @return string  The U+hex unicode code point for the character
	 */
	static public function ord($character)
	{
		$b       = array_map('ord', str_split($character));
		$invalid = FALSE;
		
		switch (strlen($character)) {
			case 1:
				if ($b[0] > 0x7F) {
					$invalid = TRUE;
					break;
				}
				$bin = decbin($b[0]);
				break;
			
			case 2:
				if ($b[0] < 0xC2 || $b[0] > 0xDF ||
					  $b[1] < 0x80 || $b[1] > 0xBF) {
					$invalid = TRUE;
					break;
				}
				$bin = substr(decbin($b[0]), 3) .
						   substr(decbin($b[1]), 2);
				break;
			
			case 3:
				if ($b[0] < 0xE0 || $b[0] > 0xEF ||
					  $b[1] < 0x80 || $b[1] > 0xBF ||
					  $b[2] < 0x80 || $b[2] > 0xBF) {
					$invalid = TRUE;
					break;
				}
				$bin = substr(decbin($b[0]), 4) .
						   substr(decbin($b[1]), 2) .
						   substr(decbin($b[2]), 2);
				break;
			
			case 4:
				if ($b[0] < 0xF0 || $b[0] > 0xF4 ||
					  $b[1] < 0x80 || $b[1] > 0xBF ||
					  $b[2] < 0x80 || $b[2] > 0xBF ||
					  $b[3] < 0x80 || $b[3] > 0xBF) {
					$invalid = TRUE;
					break;
				}
				$bin = substr(decbin($b[0]), 5) .
						   substr(decbin($b[1]), 2) .
						   substr(decbin($b[2]), 2) .
						   substr(decbin($b[3]), 2);
				break;
			
			default:
				$invalid = TRUE;
				break;
		}
		
		if ($invalid) {
			throw new fProgrammerException(
				'The UTF-8 character specified is invalid'
			);
		}
		
		$hex = strtoupper(dechex(bindec($bin)));
		return 'U+' . str_pad($hex, 4, '0', STR_PAD_LEFT);
	}
	
	
	/**
	 * Pads a string to the number of characters specified
	 * 
	 * @param  string  $string      The string to pad
	 * @param  integer $pad_length  The character length to pad the string to
	 * @param  string  $pad_string  The string to pad the source string with
	 * @param  string  $pad_type    The type of padding to do: `'left'`, `'right'`, `'both'`
	 * @return string  The input string padded to the specified character width
	 */
	static public function pad($string, $pad_length, $pad_string=' ', $pad_type='right')
	{
		$valid_pad_types = array('right', 'left', 'both');
		if (!in_array($pad_type, $valid_pad_types)) {
			throw new fProgrammerException(
				'The pad type specified, %1$s, is not valid. Must be one of: %2$s.',
				$pad_type,
				join(', ', $valid_pad_types)
			);
		}
		
		// We get better performance falling back for ASCII strings
		if (!self::detect($string) && !self::detect($pad_string)) {
			static $type_map = array(
				'left'  => STR_PAD_LEFT,
				'right' => STR_PAD_RIGHT,
				'both'  => STR_PAD_BOTH
			);
			return str_pad($string, $pad_length, $pad_string, $type_map[$pad_type]);
		}
		
		
		$string_length     = self::len($string);
		$pad_string_length = self::len($pad_string);
		
		$pad_to_length     = $pad_length - $string_length;
		
		if ($pad_to_length < 1) {
			return $string;
		}
		
		$padded           = 0;
		$next_side        = 'left';
		$left_pad_string  = '';
		$right_pad_string = '';
		
		while ($padded < $pad_to_length) {
			
			// For pad strings over 1 characters long, they may be too long to fit
			if ($pad_to_length - $padded < $pad_string_length) {
				$pad_string = self::sub($pad_string, 0, $pad_to_length - $padded);
			}
			
			switch (($pad_type != 'both') ? $pad_type : $next_side) {
				case 'right':
					$right_pad_string .= $pad_string;
					$next_side = 'left';
					break;
					
				case 'left':
					$left_pad_string .= $pad_string;
					$next_side = 'right';
					break;
			}
			
			$padded += $pad_string_length;
		}
		
		return $left_pad_string . $string . $right_pad_string;
	}
	
	
	/**
	 * Finds the first position (in characters) of the search value in the string
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for
	 * @param  integer $offset    The character position to start searching from
	 * @return mixed  The integer character position of the first occurence of the needle or `FALSE` if no match
	 */
	static public function pos($haystack, $needle, $offset=0)
	{
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			return mb_strpos($haystack, $needle, $offset, 'UTF-8');
		}
		
		$offset = self::convertOffsetToBytes($haystack, $offset);
		
		$position = strpos($haystack, $needle, $offset);
		
		if ($position === FALSE) {
			return FALSE;
		}
		
		return strlen(utf8_decode(substr($haystack, 0, $position)));
	}
	
	
	/**
	 * Replaces matching parts of the string
	 * 
	 * If `$search` and `$replace` are both arrays and `$replace` is shorter,
	 * the extra `$search` string will be replaced with an empty string. If
	 * `$search` is an array and `$replace` is a string, all `$search` values
	 * will be replaced with the string specified.
	 * 
	 * @param  string $string   The string to perform the replacements on
	 * @param  mixed  $search   The string (or array of strings) to search for - see method description for details
	 * @param  mixed  $replace  The string (or array of strings) to replace with - see method description for details
	 * @return string  The input string with the specified replacements
	 */
	static public function replace($string, $search, $replace)
	{
		return str_replace($search, $replace, $string);
	}
	
	
	/**
	 * Resets the configuration of the class
	 * 
	 * @internal
	 * 
	 * @return void
	 */
	static public function reset()
	{
		self::$mbstring_available = NULL;
	}
	
	
	/**
	 * Reverses a string
	 * 
	 * @param  string $string   The string to reverse
	 * @return string  The reversed string
	 */
	static public function rev($string)
	{
		$output = '';
		$len = strlen($string);
		
		static $char_lens = array(
			0xF0 => 4,
			0xE0 => 3,
			0xD0 => 2,
			0xC0 => 2
		);
		
		$mb_char = '';
		for ($i=0; $i<$len; $i++) {
			$char = $string[$i];
			if (ord($char) < 128) {
				$output = $char . $output;
			} else {
				switch (ord($char) & 0xF0) {
					case 0xF0:
						$output = $string[$i] . $string[$i+1] . $string[$i+2] . $string[$i+3] . $output;
						$i += 3;
						break;
						
					case 0xE0:
						$output = $string[$i] . $string[$i+1] . $string[$i+2] . $output;
						$i += 2;
						break;
						
					case 0xD0:
					case 0xC0:
						$output = $string[$i] . $string[$i+1] . $output;
						$i += 1;
						break;
				}
			}
		}
		
		return $output;
	}
	
	
	/**
	 * Finds the last position (in characters) of the search value in the string
	 * 
	 * @param  string  $haystack  The string to search in
	 * @param  string  $needle    The string to search for.
	 * @param  integer $offset    The character position to start searching from. A negative value will stop looking that many characters from the end of the string
	 * @return mixed  The integer character position of the last occurence of the needle or `FALSE` if no match
	 */
	static public function rpos($haystack, $needle, $offset=0)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($haystack)) {
			return strrpos($haystack, $needle, $offset);
		}
		
		// We don't even both trying mb_strrpos since this method is faster
		
		$offset = self::convertOffsetToBytes($haystack, $offset);
		
		$position = strrpos($haystack, $needle, $offset);
		
		if ($position === FALSE) {
			return FALSE;
		}
		
		return strlen(utf8_decode(substr($haystack, 0, $position)));
	}
	
	
	/**
	 * Trims whitespace, or any specified characters, from the end of a string
	 * 
	 * @param  string $string    The string to trim
	 * @param  string $charlist  The characters to trim
	 * @return string  The trimmed string
	 */
	static public function rtrim($string, $charlist=NULL)
	{
		if (strlen($charlist) === 0) {
			return rtrim($string);
		}
		
		$search = preg_quote($charlist, '#');
		$search = str_replace('-', '\-', $search);
		$search = str_replace('\.\.', '-', $search);
		return preg_replace('#[' . $search . ']+$#Du', '', $string);
	}
	
	
	/**
	 * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
	 * 
	 * Can optionally return the part of the haystack before the needle.
	 * 
	 * @param  string  $haystack       The string to search in
	 * @param  string  $needle         The string to search for
	 * @param  boolean $before_needle  If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
	 * @return mixed  The specified part of the haystack, or `FALSE` if the needle was not found
	 */
	static public function str($haystack, $needle, $before_needle=FALSE)
	{
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available && function_exists('mb_strstr')) {
			return mb_strstr($haystack, $needle, $before_needle, 'UTF-8');
		}
		
		$pos = strpos($haystack, $needle);
		
		if ($pos === FALSE) {
			return $pos;
		}
		
		if ($before_needle) {
			return substr($haystack, 0, $pos);
		}
		
		return substr($haystack, $pos);
	}
	
	
	/**
	 * Extracts part of a string
	 * 
	 * @param  string  $string  The string to extract from
	 * @param  integer $start   The zero-based starting index to extract from. Negative values will start the extraction that many characters from the end of the string.
	 * @param  integer $length  The length of the string to extract. If an empty value is provided, the remainder of the string will be returned.
	 * @return mixed  The extracted subtring or `FALSE` if the start is out of bounds
	 */
	static public function sub($string, $start, $length=NULL)
	{
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			$str_len = mb_strlen($string, 'UTF-8');
			if (abs($start) > $str_len) {
				return FALSE;
			}
			if ($length === NULL) {
				if ($start >= 0) {
					$length = $str_len-$start;
				} else {
					$length = abs($start);
				}
			}
			return mb_substr($string, $start, $length, 'UTF-8');
		}
		
		// We get better performance falling back for ASCII strings
		if (!self::detect($string)) {
			if ($length === NULL) {
				if ($start >= 0) {
					$length = strlen($string)-$start;
				} else {
					$length = abs($start);
				}
			}
			return substr($string, $start, $length);
		}
		
		
		// This is the slowest version
		$str_len = strlen(utf8_decode($string));
		
		if (abs($start) > $str_len) {
			return FALSE;
		}
		
		// Optimize looking by changing to negative start positions if the
		// start is in the second half of the string
		if ($start > $str_len/2) {
			$start = 0-($str_len-$start);
		}
		
		// Substrings to the end of the string are pretty simple
		$start  = self::convertOffsetToBytes($string, $start);
		$string = substr($string, $start);
		
		if ($length === NULL) {
			return $string;
		}
		
		$length = self::convertOffsetToBytes($string, $length);
		return substr($string, 0, $length);
	}
	
	
	/**
	 * Trims whitespace, or any specified characters, from the beginning and end of a string
	 * 
	 * @param  string $string    The string to trim
	 * @param  string $charlist  The characters to trim, .. indicates a range
	 * @return string  The trimmed string
	 */
	static public function trim($string, $charlist=NULL)
	{
		if (strlen($charlist) === 0) {
			return trim($string);
		}
		
		$search = preg_quote($charlist, '#');
		$search = str_replace('-', '\-', $search);
		$search = str_replace('\.\.', '-', $search);
		return preg_replace('#^[' . $search . ']+|[' . $search . ']+$#Du', '', $string);
	}
	
	
	/**
	 * Converts the first character of the string to uppercase.
	 * 
	 * @param  string $string  The string to process
	 * @return string  The processed string
	 */
	static public function ucfirst($string)
	{
		return self::upper(self::sub($string, 0, 1)) . self::sub($string, 1);
	}
	
	
	/**
	 * Converts the first character of every word to uppercase
	 * 
	 * Words are considered to start at the beginning of the string, or after any
	 * whitespace character.
	 * 
	 * @param  string $string  The string to process
	 * @return string  The processed string
	 */
	static public function ucwords($string)
	{
		return preg_replace_callback(
			'#(?<=^|\s|[\x{2000}-\x{200A}]|/|-|\(|\[|\{|\||"|^\'|\s\'|‘|“)(.)#u',
			array('self', 'ucwordsCallback'),
			$string
		);
	}
	
	
	/**
	 * Handles converting a character to uppercase for ::ucwords()
	 * 
	 * @param array $match  The regex match from ::ucwords()
	 * @return string  The uppercase character
	 */
	static private function ucwordsCallback($match)
	{
		return self::upper($match[1]);
	}
	
	
	/**
	 * Converts all lowercase characters to uppercase
	 * 
	 * @param  string $string  The string to convert
	 * @return string  The input string with all lowercase characters in uppercase
	 */
	static public function upper($string)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($string)) {
			return strtoupper($string);
		}
		
		if (self::$mbstring_available === NULL) {
			self::checkMbString();
		}
		
		if (self::$mbstring_available) {
			$string = mb_strtoupper($string, 'utf-8');
			// For some reason mb_strtoupper misses some character
			return strtr($string, self::$mb_lower_to_upper_fix);
		}
		
		return strtr($string, self::$lower_to_upper);
	}
	
	
	/**
	 * Wraps a string to a specific character width
	 * 
	 * @param  string  $string  The string to wrap
	 * @param  integer $width	The character width to wrap to
	 * @param  string  $break   The string to insert as a break
	 * @param  boolean $cut     If words longer than the character width should be split to fit
	 * @return string  The input string with all lowercase characters in uppercase
	 */
	static public function wordwrap($string, $width=75, $break="\n", $cut=FALSE)
	{
		// We get better performance falling back for ASCII strings
		if (!self::detect($string)) {
			return wordwrap($string, $width, $break, $cut);
		}
		
		$words = preg_split('#(?<=\s|[\x{2000}-\x{200A}])#ue', $string);
		
		$output = '';
		
		$line_len = 0;
		foreach ($words as $word) {
			$word_len = self::len($word);
			
			// Shorten up words that are too long
			while ($cut && $word_len > $width) {
				$output  .= $break;
				$output  .= self::sub($word, 0, $width);
				$line_len = $width;
				$word	  = self::sub($word, $width);
				$word_len = self::len($word);
			}
			
			if ($line_len && $line_len + $word_len > $width) {
				$output  .= $break;
				$line_len = 0;
			}
			$output   .= $word;
			$line_len += $word_len;
		}
		
		return $output;
	}
	
	
	/**
	 * Forces use as a static class
	 * 
	 * @return fUTF8
	 */
	private function __construct() { }
}



/**
 * Copyright (c) 2008-2011 Will Bond <will@flourishlib.com>
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */